673205ed5fe93f907b2e3e7e77fd8f9d006b4bfb,java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java,SomaticMutationWalker,map,#RefMetaDataTracker#char#LocusContext#,177
Before Change
// at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must
// be at least 6.3;
double tumorLod = t2.getAltVsRef(altAllele);
if (t2.qualitySums.get(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD_THRESHOLD) {
if (OUTPUT_FAILURES) {
System.out.println("FAILED " + context.getContig() + ":" + context.getPosition() + " due to MAX MM QSCORE TEST." +
" LOD was " + tumorReadPile.getAltVsRef(altAllele) +
" LOD is now " + t2.getAltVsRef(altAllele) +
" QSUM was " + tumorReadPile.qualitySums.get(altAllele) +
" QSUM is now " + t2.qualitySums.get(altAllele));
}
continue;
}
// TODO: using the original pile here since often these artifacts will be supported
// by those reads that get thrown out! Maybe that means we don't need the noise filter...
boolean shouldDisalign =
disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);
if (shouldDisalign) {
if (OUTPUT_FAILURES) {
System.out.println("FAILED " + context.getContig() + ":" + context.getPosition() + " due to DISALIGNMENT TEST.");
}
continue;
}
// (ii) the quality score sum for the mutant base in the normal must be < 50 and the
// LOD score for ref:ref vs mutant:ref + mutant:mutant must be at least 2.3.
double normalLod = normalReadPile.getRefVsAlt(altAllele);
if ( normalReadPile.qualitySums.get(altAllele) > 50 || normalLod < NORMAL_LOD_THRESHOLD) {
continue;
}
// if we're still here... we've got a somatic mutation! Output the results
// and stop looking for mutants!
if (bedOutput) {
out.println(
context.getContig() + "\t" +
context.getPosition() + "\t" +
context.getPosition() + "\t" +
"TScore:" + tumorLod +
"__TRefSum: " + tumorReadPile.qualitySums.get(ref) +
"__TAltSum: " + tumorReadPile.qualitySums.get(altAllele) +
After Change
}
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
Map<Character, Integer> midp = new HashMap<Character, Integer>();
midp.put('A', Integer.MAX_VALUE);
midp.put('C', Integer.MAX_VALUE);
midp.put('G', Integer.MAX_VALUE);
midp.put('T', Integer.MAX_VALUE);
// First, exclude DBSNP Sites
for ( ReferenceOrderedDatum datum : tracker.getAllRods() )
{
if ( datum != null )
{
if ( datum instanceof rodDbSNP) {
// we're at a dbsnp site... move along... nothing to see here...
return -1;
}
}
}
char upRef = Character.toUpperCase(ref);
// TODO: should we be able to call mutations at bases where ref is N?
if (upRef == 'N') { return -1; }
List<SAMRecord> reads = context.getReads();
LocusReadPile tumorReadPile = new LocusReadPile(ref);
LocusReadPile normalReadPile = new LocusReadPile(ref);
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
if (read.getNotPrimaryAlignmentFlag() ||
read.getDuplicateReadFlag() ||
read.getReadUnmappedFlag() ||
read.getMappingQuality() <= 0
|| (read.getReadPairedFlag() && (!read.getProperPairFlag() || read.getInferredInsertSize() >= MAX_INSERT_SIZE))
) {
continue;
}
String rg = (String) read.getAttribute("RG");
String sample = read.getHeader().getReadGroup(rg).getSample();
int offset = context.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
if (base == 'N' || base == 'n') { continue; }
// TODO: build a pile of reads and offsets, then pass that into a
// constructor for the normalGL class
// that way, we can build a different pile of reads later on and extract the genotype
if (normalSampleName.equals(sample)) {
normalReadPile.add(read, offset);
} else if (tumorSampleName.equals(sample)) {
tumorReadPile.add(read, offset);
int midDist = Math.abs((int)(read.getReadLength() / 2) - offset);
if (midDist < midp.get(base)) { midp.put(base, midDist); }
} else {
throw new RuntimeException("Unknown Sample Name: " + sample);
}
}
// pretest: if the sum of the quality scores for all non-ref alleles < 60, just quit looking now
if (tumorReadPile.qualitySums.getOtherQualities(ref) < MIN_MUTANT_SUM_PRETEST) {
return -1;
}
// Test each of the poosible alternate alleles
for (char altAllele : new char[]{'A','C','G','T'}) {
if (altAllele == upRef) { continue; }
// (i) either an adjusted quality score sum in the tumor for the mutant base must be
// at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must
// be at least 6.3;
int mutantSum = tumorReadPile.qualitySums.get(altAllele);
int refSum = tumorReadPile.qualitySums.get(ref);
if (tumorReadPile.getAltVsRef(altAllele) >= TUMOR_LOD_THRESHOLD
// ||
// (mutantSum >= MIN_MUTANT_SUM && (float)mutantSum / (float) refSum >= 0.05f)
) {
// yep -- just fall through... easier to think about this way!
} else {
continue;
}
// make sure we've seen at least 1 obs of the alternate allele within 20bp of the read-middle
boolean failedMidpointCheck = midp.get(altAllele) > 20;
// if (failedMidpointCheck) {
// out.println("Rejecting due to midpoint check!");
// return 0;
// }
// do a MSA to figure out if the alternate reads comes from a cluster of reads with more
// than one alternate allele (hints at being an alignment artifact)
ReferenceSequence refSeq;
// TODO: don't hardcode. Make this the max read length in the pile
long refStart = context.getLocation().getStart() - 150;
//tumorReadPile.offsets.get(0);
long refStop = context.getLocation().getStart() + 150;
try {
IndexedFastaSequenceFile seqFile = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
refSeq = seqFile.getSubsequenceAt(context.getContig(),refStart, refStop);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
// System.out.println("TESTING " + context.getContig() + ":" + context.getPosition());
LocusReadPile t2 = filterHighMismatchScoreReads(tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);
// TEST the LOD score again!
// TODO: extract this since we'll do it multiple times...
// (i) either an adjusted quality score sum in the tumor for the mutant base must be
// at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must
// be at least 6.3;
double tumorLod = t2.getAltVsRef(altAllele);
if (mode.equals("full") && t2.qualitySums.get(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD_THRESHOLD) {
if (OUTPUT_FAILURES) {
String msg = "FAILED due to MAX MM QSCORE TEST." +
" LOD was " + tumorReadPile.getAltVsRef(altAllele) +
" LOD is now " + t2.getAltVsRef(altAllele) +
" QSUM was " + tumorReadPile.qualitySums.get(altAllele) +
" QSUM is now " + t2.qualitySums.get(altAllele);
System.out.println(
context.getContig() + "\t" +
context.getPosition() + "\t" +
context.getPosition() + "\t"
+ msg.replace(' ','_')
);
}
continue;
}
// TODO: using the original pile here since often these artifacts will be supported
// by those reads that get thrown out! Maybe that means we don't need the noise filter...
boolean shouldDisalign =
disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);
if (mode.equals("full") && shouldDisalign) {
if (OUTPUT_FAILURES) {
String msg = "FAILED due to DISALIGNMENT TEST.";
System.out.println(
context.getContig() + "\t" +
context.getPosition() + "\t" +
context.getPosition() + "\t"
+ msg.replace(' ','_')
);